{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Checkout my [Twitter(@rohanpaul_ai)](https://twitter.com/rohanpaul_ai) for daily LLM bits"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Inferencing on Mistral 7B LLM with 4-bit quantization 🚀 - In FREE Google Colab\n",
    "\n",
    "# [Link to my Youtube Video Explaining this whole Notebook](https://www.youtube.com/watch?v=eovBbABk3hw&list=PLxqBkZuBynVTzqUQCQFgetR97y1X_1uCI&index=10&ab_channel=Rohan-Paul-AI)\n",
    "\n",
    "[![Imgur](https://imgur.com/Lz4ov4K.png)](https://www.youtube.com/watch?v=eovBbABk3hw&list=PLxqBkZuBynVTzqUQCQFgetR97y1X_1uCI&index=10&ab_channel=Rohan-Paul-AI)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install git+https://github.com/huggingface/transformers -q peft  accelerate bitsandbytes safetensors sentencepiece"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
    "\n",
    "model_name = 'bn22/Mistral-7B-Instruct-v0.1-sharded'\n",
    "\n",
    "def load_quantized_model(model_name: str):\n",
    "    \"\"\"\n",
    "    :param model_name: Name or path of the model to be loaded.\n",
    "    :return: Loaded quantized model.\n",
    "    \"\"\"\n",
    "    bnb_config = BitsAndBytesConfig(\n",
    "        load_in_4bit=True,\n",
    "        bnb_4bit_use_double_quant=True,\n",
    "        bnb_4bit_quant_type=\"nf4\",\n",
    "        bnb_4bit_compute_dtype=torch.bfloat16\n",
    "    )\n",
    "\n",
    "    model = AutoModelForCausalLM.from_pretrained(\n",
    "        model_name,\n",
    "        load_in_4bit=True,\n",
    "        torch_dtype=torch.bfloat16,\n",
    "        quantization_config=bnb_config\n",
    "    )\n",
    "\n",
    "    return model\n",
    "\n",
    "def initialize_tokenizer(model_name: str):\n",
    "    \"\"\"\n",
    "    Initialize the tokenizer with the specified model_name.\n",
    "\n",
    "    :param model_name: Name or path of the model for tokenizer initialization.\n",
    "    :return: Initialized tokenizer.\n",
    "    \"\"\"\n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
    "    tokenizer.bos_token_id = 1  # Set beginning of sentence token id\n",
    "    return tokenizer\n",
    "\n",
    "\n",
    "model = load_quantized_model(model_name)\n",
    "\n",
    "tokenizer = initialize_tokenizer(model_name)\n",
    "\n",
    "# Define stop token ids\n",
    "stop_token_ids = [0]\n",
    "\n",
    "text = \"[INST] How AI will replace Engineers [/INST]\"\n",
    "\n",
    "encoded = tokenizer(text, return_tensors=\"pt\", add_special_tokens=False)\n",
    "model_input = encoded\n",
    "generated_ids = model.generate(**model_input, max_new_tokens=200, do_sample=True)\n",
    "decoded = tokenizer.batch_decode(generated_ids)\n",
    "print(decoded[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
